Aminet 24

home *** CD-ROM | disk | FTP | other *** search

/ Aminet 24 / Aminet 24 (1998)(GTI - Schatztruhe)[!][Apr 1998].iso / Aminet / dev / c / chunky.lha / lib_chunky / lib-src / c2p.asm next >

Wrap

Assembly Source File | 1998-02-25 | 18KB | 995 lines

; 100% systemfriendly ChunkyToPlanar converter for use with intuition ; screens in applications/games/whatever. ; ; Coded in 1994 by Morten Eriksen. ; Reach me through email: mortene@stud.unit.no. ; ; Use and modify as you like - give credit where appropriate. ; ; Timings on my A1200 with 68EC020 and 32-bit fastram: ; ; Testcase Dimensions This routine C='s WritePixelArray8 ; ; 1 320x256x5 - 15 frames (0.30 seconds) 33 frames ; 2 320x256x8 - 16 frames (0.32 seconds) 51 frames ; 3 640x512x4 - 63 frames (1.26 seconds) 100 frames ; 4 1024x1024x1 - 194 frames (3.88 seconds) 101 frames ; 5 752x578x8 - 88 frames (1.76 seconds) 250 frames ; ; Please help me speed up this sucker - the main bottleneck is the ; 'convert32pixels' subroutine (e.g. in the 3rd testcase, 60 out of ; 63 frames are spent in this routine). As you can see, it gets worse ; on less bitplanes (compared to C='s routine) - but on 8-bit bitmaps it's ; about 3 times as fast. If you improve it, be sure to repost the improved ; version to Usenet, or send it to me through email. ; ;------------------------------------------------------------------------------ xdef _ChunkyToPlanarAsm ;------------------------------------------------------------------------------ ; This routine takes a buffer of chunkybytes and transform it into ; planar datas, which are directly inserted into the destination BitMap. ; ; As a replacement for C='s WritePixelArray8 routine, this one is on average ; about twice as fast and works directly on bitmaps (instead of RastPorts). ; All cases handled (any width and height > 0, no alignment restrictions). ; Works on OCS/ECS/AGA, all Kickstarts and any MC680x0 CPU. ; The downside is that it does not do clipping and does not work with ; interleaved bitmaps (support for interleaved bitmaps should be piece of ; cake to implement, though). ;------------------------------------------------------------------------------ ; ; C interface: ; ; extern void __asm ChunkyToPlanarAsm(register __a0 struct c2pStruct *); ; ; struct c2pStruct ; { ; struct BitMap *bmap; ; UWORD startX, startY, width, height; ; UBYTE *chunkybuffer; ; } c2p; ; ; c2p.bmap = mybitmap; ; c2p.startX = x0; ; c2p.startY = y0; ; c2p.width = x1 - x0 + 1; ; c2p.height = y1 - y0 + 1; ; c2p.chunkybuffer = chunkybytes; ; ; ChunkyToPlanarAsm(&c2p); ; CopySBitMap(mywindow->RPort->Layer); ;------------------------------------------------------------------------------ ; ; Assembler interface: ; ; In: a0 - c2p struct. ; Out: Nothing. ;------------------------------------------------------------------------------ ; ** BitMap struct ** BytesPerRow EQU 0 ; UWORD Rows EQU 2 ; UWORD Flags EQU 4 ; UBYTE Depth EQU 5 ; UBYTE Pad EQU 6 ; UWORD Planes EQU 8 ; PLANEPTRs [8] ; ** c2p struct ** bmap EQU 0 ; struct BitMap * startX EQU 4 ; UWORD startY EQU 6 ; UWORD width EQU 8 ; UWORD height EQU 10 ; UWORD chunkybuffer EQU 12 ; UBYTE * ;------------------------------------------------------------------------------ Section code,CODE _ChunkyToPlanarAsm: movem.l d2-d7/a2-a6,-(sp) tst.b madetable * need only make bitspreadtable once bne.s table_made bsr.w make_table table_made * find number of not bytealigned pixels at left side of frame and * the mask to be used moveq #0,d0 move.w startX(a0),d0 andi.w #%111,d0 tst.w d0 beq.s leadno move.w d0,d1 moveq #8,d0 sub.w d1,d0 move.b #$ff,d1 lsl.b d0,d1 move.b d1,andval_lead leadno move.w d0,leadingbits move.w width(a0),d0 cmp.w leadingbits,d0 bhs.s notsingle * in case the whole chunkypixels buffer to be inserted fits into * a single byte move.b #$80,d1 subq #1,d0 asr.b d0,d1 addq #1,d0 moveq #8,d2 sub.w leadingbits,d2 lsr.b d2,d1 not.b d1 move.b d1,andval_single clr.w trailingbits bra.s single notsingle clr.b andval_single * find number of not bytealigned pixels at right side of frame and * the mask to be used sub.w leadingbits,d0 move.w d0,d1 andi.w #$fff8,d1 sub.w d1,d0 move.w d0,trailingbits move.b #$ff,d1 lsr.b d0,d1 move.b d1,andval_trail single * initialize variables to be used (modulo, bytealigned width, etc) move.l bmap(a0),a1 move.l chunkybuffer(a0),a3 moveq #0,d0 move.b Depth(a1),d0 move.w d0,depth move.w height(a0),Height move.w width(a0),Width move.w leadingbits,d1 sub.w d1,Width move.w trailingbits,d1 sub.w d1,Width move.w Width,GWidth move.w BytesPerRow(a1),Modulo tst.b andval_single bne.s singlebyte move.w Width,d0 lsr.w #3,d0 sub.w d0,Modulo singlebyte tst.w leadingbits beq.s nolead sub.w #1,Modulo nolead tst.w trailingbits beq.s notrail sub.w #1,Modulo notrail * find initial offset in bytes into bitmap given by (startX, startY) moveq #0,d0 move.w startX(a0),d0 lsr.w #3,d0 move.w startY(a0),d1 tst.w d1 beq.s line0 moveq #0,d2 move.w BytesPerRow(a1),d2 subq #1,d1 makeoffset add.l d2,d0 dbra d1,makeoffset line0 move.l d0,a2 * find jumpaddress for planar data insertion into bitmap * (depending on bitmap depth) move.l a1,a6 add.l #Planes,a6 lea insertpointerslong,a4 lea insertpointers,a5 moveq #0,d7 move.w depth,d7 lsl.w #2,d7 add.l d7,a6 move.l a6,planepointerspointer subq #4,d7 add.l d7,a4 add.l d7,a5 move.l (a4),insertpointerlong move.l (a5),insertpointer lea c2p,a4 ; a0 - c2p struct, a1 - bmap, ; a2 - offset from Planes pointer, a3 - chunkybuffer ; a4 - bitspread table convertlines * check if whole width fits into a single byte and take * appropriate action if this is the case tst.b andval_single beq.s testlead bsr.w insertsinglebytebits bra.w trailno testlead * is the frame _not_ left bytealigned ? tst.w leadingbits beq.b convert8pixels bsr.w insertleadingbits convert8pixels * done? tst.w Width beq.s rowdone * check if we can do a speedier 32 pixels in one go conversion... cmp.w #32,Width blo.s nolong * test odd/even address to be compatible with 68000 aswell * move.l Planes(a1),d0 add.l a2,d0 btst #0,d0 bne.s nolong bsr.w convert32pixels sub.w #32,Width addq #4,a2 bra.s convert8pixels nolong * ...rather than the slower 8 pixel conversion moveq #0,d0 moveq #0,d4 moveq #8-1,d1 convert8bytes * find correct bitspreadtable offset -> a4 * moveq #0,d2 move.b (a3)+,d2 lsl.w #3,d2 * insert bitrow * add.l d0,d0 or.l 0(a4,d2.w),d0 addq #4,d2 add.l d4,d4 or.l 0(a4,d2.w),d4 dbra d1,convert8bytes * spread bits to all registers * move.b d0,d3 lsr.w #8,d0 move.b d0,d2 swap d0 move.b d0,d1 lsr.w #8,d0 move.b d4,d7 lsr.w #8,d4 move.b d4,d6 swap d4 move.b d4,d5 lsr.w #8,d4 cmp.w #8,depth beq.s its8 move.l a2,d7 its8 move.l planepointerspointer(pc),a6 move.l insertpointer(pc),a5 jsr (a5) addq #1,a2 subq #8,Width bra.w convert8pixels rowdone * are there any non bytealigned pixels remaining at the right side * of the frame? tst.w trailingbits beq.b trailno bsr.w inserttrailingbits trailno move.w GWidth,Width add.w Modulo,a2 subq #1,Height tst.w Height bne.w convertlines movem.l (sp)+,d2-d7/a2-a6 rts ;------------------------------------------------------------------------------ insertdata: * insert converted 8 pixels c->p data into the bitmap plane8 move.l -(a6),a5 add.l a2,a5 move.b d7,(a5) move.l a2,d7 plane7 move.l -(a6),a5 move.b d6,0(a5,d7.l) plane6 move.l -(a6),a5 move.b d5,0(a5,d7.l) plane5 move.l -(a6),a5 move.b d4,0(a5,d7.l) plane4 move.l -(a6),a5 move.b d3,0(a5,d7.l) plane3 move.l -(a6),a5 move.b d2,0(a5,d7.l) plane2 move.l -(a6),a5 move.b d1,0(a5,d7.l) plane1 move.l -(a6),a5 move.b d0,0(a5,d7.l) rts ;------------------------------------------------------------------------------ insertdatalong: * insert converted 32 pixels c->p data into the bitmap plane8l move.l -(a6),a5 add.l a2,a5 move.l d7,(a5) move.l a2,d7 plane7l move.l -(a6),a5 move.l d6,0(a5,d7.l) plane6l move.l -(a6),a5 move.l d5,0(a5,d7.l) plane5l move.l -(a6),a5 move.l d4,0(a5,d7.l) plane4l move.l -(a6),a5 move.l d3,0(a5,d7.l) plane3l move.l -(a6),a5 move.l d2,0(a5,d7.l) plane2l move.l -(a6),a5 move.l d1,0(a5,d7.l) plane1l move.l -(a6),a5 move.l d0,0(a5,d7.l) rts ;------------------------------------------------------------------------------ insertpointer dc.l